library(tidyverse)
library(terra)
library(sf)
library(leaflet)
library(scico)
library(flextable)Testing IHS
Map clustered data
To make fuzzy maps of each set of clusters, first import the FCM models generated in the previous step. Also load one of the input rasters, which can be used as a framework to re-spatialise the data. Doesn’t matter which.
model_files <-
list.files(file.path('models'), pattern = '\\.rds', full.names = TRUE)
models <- lapply(model_files, readRDS)
names(models) <- paste0('fcm_',
sprintf('%02d',
sapply(models, function(i) nrow(i[['centers']]))))
grid <- rast(file.path('data_spatial', 'processed_covariates', 'slope.tif'))The following function takes the outputs of a model and maps each set of cluster values onto a grid that matches the input covariates. Since the cases are arranged in cell order throughout the analysis, the spatial arrangement of the clusters can be reconstituted.
fuzzy_maps <- function(model = NULL, grid = NULL) {
# get indexes of cells that contain data
valcells <- which(!is.na(terra::values(grid)))
# get number of clusters - 1 cluster, 1 map
cen <- length(model$size)
# check that cases == cells or the output maps will be scrambled
cases <- nrow(model$membership)
if(length(valcells) != cases) {
stop("cluster cases doesn't match number of non-NA grid cells - check inputs")
}
out <- lapply(seq(cen), function(m) {
grid[valcells] <- model$membership[, m]
grid
})
names(out) <- paste0('Cluster_', sprintf('%02d', seq(cen)))
terra::rast(out)
}
# for example,
clmaps_04 <- fuzzy_maps(models[['fcm_04']], grid)So despite the poor validity statistics, we see some patterns that are recognisable, e.g. the steep areas appear to be highlighted in Cluster 3.
Harden maps
These maps can be hardened into binary surfaces at a set membership cut-off. A value of 0.6 is used below per the results of the sensitivity testing in (2013, sec. 3.4):
binary_maps <- function(model = NULL, grid = NULL, threshold = 0.6) {
# reclassify the membership matrix to 0/1 by threshold
mem_bin <- apply(model$membership, MARGIN = c(1,2), function(i) {
# don't use NA here, makes rep grade calc harder later
if(i < threshold) { 0 } else { 1 }
})
valcells <- which(!is.na(terra::values(grid)))
cen <- length(model$size)
cases <- nrow(mem_bin)
if(length(valcells) != cases) {
stop("cluster cases doesn't match number of non-NA grid cells - check inputs")
}
out <- lapply(seq(cen), function(m) {
grid[valcells] <- mem_bin[, m]
grid
})
names(out) <- paste0('Cluster_', sprintf('%02d', seq(cen)))
terra::rast(out)
}
binmaps_04 <- binary_maps(models[['fcm_04']], grid, 0.6)Map representative grade
The ‘representative grade’ can now be calculated - the number of times each pixel appears in a cluster (with a sufficiently high membership each time) across multiple c.
using c = 4,5,6,7,8 for now
# already did c = 4, so
clmaps_05 <- fuzzy_maps(models[['fcm_05']], grid)
clmaps_06 <- fuzzy_maps(models[['fcm_06']], grid)
clmaps_07 <- fuzzy_maps(models[['fcm_07']], grid)
clmaps_08 <- fuzzy_maps(models[['fcm_08']], grid)
binmaps_05 <- binary_maps(models[['fcm_05']], grid, 0.6)
binmaps_06 <- binary_maps(models[['fcm_06']], grid, 0.6)
binmaps_07 <- binary_maps(models[['fcm_07']], grid, 0.6)
binmaps_08 <- binary_maps(models[['fcm_08']], grid, 0.6)Representative grade is a simple overlay-and-sum operation using the hardened cluster maps.
rep_04 <- sum(binmaps_04) # maps whether cells were in any cluster
rep_05 <- sum(binmaps_05)
rep_06 <- sum(binmaps_06)
rep_07 <- sum(binmaps_07)
rep_08 <- sum(binmaps_08)
representative_grade <- sum(c(rep_05, rep_05, rep_06, rep_07, rep_08))Determine environmental cluster chains
The ‘environmental cluster chains’ for each pixel now need to be determined. This effectively assigns a cluster/iteration signature, e.g. a pixel representative of class 1 in the 3-cluster surface and class 3 in the 5-cluster surface would have a label like ‘1-0-3-0-0’.
Each of these cluster chains will occupy a set number of pixels. Yang et al. (2013) propose that the chains that have both a high representative grade and a large area should be targeted first for sampling, and that pixels with a high average membership within those areas are the best places to go.
Firstly, the binary maps need to be recoded so that they have a cluster number - so e.g. the map for cluster 3 in the c = 4 solution takes values of 3/0 instead of 1/0
renumber_binmaps <- function(binmaps = NULL) {
nl <- terra::nlyr(binmaps)
for(i in seq(nl)) { binmaps[[i]][binmaps[[i]] == 1] <- i }
binmaps
}
binmaps_04_rn <- renumber_binmaps(binmaps_04)
binmaps_05_rn <- renumber_binmaps(binmaps_05)
binmaps_06_rn <- renumber_binmaps(binmaps_06)
binmaps_07_rn <- renumber_binmaps(binmaps_07)
binmaps_08_rn <- renumber_binmaps(binmaps_08)Next, stack all of the input clustering layers together, grouped by c using sum().
all_clusters <- rast(list(sum(binmaps_04_rn),
sum(binmaps_05_rn),
sum(binmaps_06_rn),
sum(binmaps_07_rn),
sum(binmaps_08_rn)))
names(all_clusters) <- paste0('fcm_', sprintf('%02d', 4:8))This allows us to extract all of the ECC chain data to a table with
get_clusterchains <- function(clmap = NULL) {
as.data.frame(clmap) %>%
# create ecc name and calc rep grade and a UID
mutate(ecc_name = apply(., MARGIN = 1, function(i) {
if(all(is.na(i))) { return(NA_character_) }
paste0(i, collapse = '-')
}),
rep_grade = rowSums(. != 0)) %>%
# calc rep area
group_by(across(everything())) %>%
summarise(rep_area = n()) %>%
ungroup() %>%
mutate(across(-ecc_name, na_if, 0)) %>%
filter(rep_grade > 0) %>%
arrange(desc(rep_grade), desc(rep_area)) %>%
# will need later...
mutate(uid = row_number())
}
all_eccs <- get_clusterchains(all_clusters)And map out the location of each chain.
# this needs some work
map_clusterchains <- function(clmap = NULL, eccs = NULL,
id_field = NULL, grid = NULL) {
dat <- as.data.frame(clmap) %>%
mutate(ecc_name = apply(., MARGIN = 1, function(i) {
if(all(is.na(i))) { return(NA_character_) }
paste0(i, collapse = '-')
})) %>%
left_join(., eccs[, c('ecc_name', id_field)], by = 'ecc_name')
grid[which(!is.na(values(grid)))] <- dat[[id_field]]
grid
}
ecc_map <- map_clusterchains(all_clusters, all_eccs, 'uid', grid)Next, tag the chains Yang et al. (2013) regard as redundant, in that they are ‘contained’ in a chain with a higher representative grade (e.g. 0-2-0-0-8 would be redundant to sample if 2-2-2-2-8 was already targeted).
# https://stackoverflow.com/questions/63806865/flagging-redundant-rows-with-na/
# if its stupid and it works, its not stupid >.>
tag_redundant <- function(dat = NULL, n_c = NULL) {
na_count <- rowSums(is.na(dat[, 1:n_c]))
strings <- apply(dat[, 1:n_c], MARGIN = 1, function(row) {
row[is.na(row)] <- '.'
paste0(row, collapse ='')
})
dat$is_unique <-
sapply(seq_along(strings), function(i) {
if(na_count[i] == 0) { return(TRUE) } # no redundancy where rg == max rg
test_targets <- strings[na_count <= na_count[i]]
test_targets <- test_targets[!test_targets %in% strings[i]]
!any(grepl(strings[i], test_targets))
})
dat
}
all_eccs <- tag_redundant(all_eccs, 5)ecc_name | rep_grade | rep_area | uid | is_unique |
2-2-2-2-8 | 5 | 1,107 | 1 | TRUE |
3-3-6-6-6 | 5 | 275 | 2 | TRUE |
1-1-5-1-1 | 5 | 134 | 3 | TRUE |
1-5-5-1-5 | 5 | 113 | 4 | TRUE |
2-1-1-7-2 | 5 | 80 | 5 | TRUE |
1-1-1-7-1 | 5 | 37 | 6 | TRUE |
4-4-3-5-4 | 5 | 10 | 7 | TRUE |
1-1-1-7-2 | 5 | 1 | 8 | TRUE |
2-0-1-7-2 | 4 | 382 | 9 | FALSE |
0-1-1-7-2 | 4 | 340 | 10 | FALSE |
This signifies that 21 ECCs are unique. Of those, only 9 have a representative area of > 20 cells (in this case, that’s 0.2 ha). These are the areas in which sample points should be chosen. This is the next step, which requires the following data.
if(!dir.exists(file.path('sample_plan'))) {
dir.create(file.path('sample_plan'))
}
if(!dir.exists(file.path('data_spatial', 'model_outputs'))) {
dir.create(file.path('data_spatial', 'model_outputs'))
}
write_csv(all_eccs, file.path('sample_plan', 'ECC_table.csv'))
writeRaster(ecc_map,
file.path('data_spatial', 'model_outputs', 'ecc_map.tif'),
overwrite = TRUE,
datatype = 'FLT4S',
gdal = "COMPRESS=LZW")
writeRaster(all_clusters,
file.path('data_spatial', 'model_outputs', 'all_clusters.tif'),
overwrite = TRUE,
datatype = 'FLT4S',
gdal = "COMPRESS=LZW")
# need all the membership data for picking sites too,
purrr::map2(
list(clmaps_04, clmaps_05, clmaps_06, clmaps_07, clmaps_08), 4:8,
function(i, j) {
outnm <- paste0('cluster_', sprintf('%02d', j), '_membership.tif')
writeRaster(i,
file.path('data_spatial', 'model_outputs', outnm),
overwrite = TRUE,
datatype = 'FLT4S',
gdal = "COMPRESS=LZW")
})